'''
Created on 2012-4-20

@author: michaelh0226
'''
from scrapy.spider import BaseSpider 
from scrapy.selector import HtmlXPathSelector 

class DmozSpider(BaseSpider): 
    name = "dmoz.org" 
    allowed_domains = ["dmoz.org"] 
    start_urls = [ 
        "http://www.dmoz.org/Computers/Programming/Languages/Python/Books/", 
        "http://www.dmoz.org/Computers/Programming/Languages/Python/Resources/" 
    ] 

    def parse(self, response): 
        hxs = HtmlXPathSelector(response) 
        sites = hxs.select('//ul/li') 
        for site in sites: 
            title = site.select('a/text()').extract() 
            link = site.select('a/@href').extract() 
            desc = site.select('text()').extract() 
            print title, link, desc